
###
### DECENT LIVING STANDARDS MAPPING 
### 

###
### DESCRIPTIVE GRAPHS 
### 

rm(list=ls())

##
## PACKAGES --------------------------------------------------------------
##

library(tidyverse)
library(countrycode)
library(stringr)
library(data.table)
library(texreg)
library(sandwich)
library(margins)

##
## LOADING DATA ----------------------------------------------------------
##

load(file="Complete DLS data file_regional level.RData")
dls <- dls_region_urbanrural

##
## DATA PREPARATION ------------------------------------------------------
## 

#> converting countryname to continent/region


table(dls$worldregion, useNA = "always")

recode_na = function(., threshold_NA){
  . = ifelse(. <=threshold_NA, ., NA)
  .
}


##
## SIMPLE PREDICTION MODELS TO PLOT TIME TRENDS --------------------------
## 

## Can merge South Asia and East Asia and then do it separately for Asia, LAc, and SSA
table(dls$wave)
table(dls$rural)
table(dls$wave,   dls$year)

dls <- dls %>% 
  mutate(year = cut(as.numeric(year),
                               breaks=c(-Inf,1994,1999,2004,2009,2014,Inf)))
table(dls$wave,   dls$year)

#dls.region <- within(dls.region, wave <- relevel(as.factor(wave), ref = 1))

dls$year <- relevel(as.factor(dls$year), "(2009,2014]")

mean_access <- dls %>%
  filter(year=="(2009,2014]" & rural == 0) %>% 
  summarize(dim1_housing_mean = mean(dim1_housing_region, na.rm = T),
            dim2_thermal_mean = mean(dim2_thermal_region, na.rm = T),
            dim3_nutrition_mean = mean(dim3_nutrition_region, na.rm = T),
            dim4_foodprep_mean = mean(dim4_foodprep_region, na.rm = T),
            dim5_water_mean = mean(dim5_water_region, na.rm = T),
            dim6_sanitation_mean = mean(dim6_sanitation_region, na.rm = T),
            dim7_health_mean = mean(dim7_health_region, na.rm = T),
            dim8_education_mean = mean(dim8_education_region, na.rm = T),
            dim9_socialconnect_mean = mean(dim9_socialconnect_region, na.rm = T),
            dim10_physicalconnect_mean = mean(dim10_physicalconnect_region, na.rm = T),
            dls_min7_region_mean = mean(dls_min7_region, na.rm=T),
            dls_min10_region_mean = mean(dls_min10_region, na.rm=T))

tab <- list(
  lm(dim1_housing_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim2_thermal_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim3_nutrition_region ~as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim4_foodprep_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim5_water_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim6_sanitation_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim7_health_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim8_education_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim9_socialconnect_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dim10_physicalconnect_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dls_min7_region ~ as.factor(year)*rural+
       country_name,
     data=dls),
  lm(dls_min10_region ~ as.factor(year)*rural+
       country_name,
     data=dls)
)


d0 <- as.data.frame(tab[[c(11,1)]])
d0 <- d0 %>% rename("coef"="tab[[c(11, 1)]]")
d0 <- rownames_to_column(d0, var="variable")
d0 <- d0 %>% filter(grepl("country", variable)==F)
d0[1,2] <- mean_access[1,11]

d0 <- d0 %>% mutate(">2/3 of DLS dimensions" =
                      case_when(variable=="(Intercept)" ~ d0[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d0[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d0[1,2] + d0[2,2] + d0[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d0[1,2] + d0[3,2] + d0[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d0[1,2] + d0[4,2] + d0[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d0[1,2] + d0[5,2] + d0[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d0[1,2] + d0[6,2] + d0[7,2]+ coef,
                                variable=="rural" ~ d0[1,2]+d0[7,2])) 

d0[8,3] <- 0

d00 <- as.data.frame(tab[[c(12,1)]])
d00 <- d00 %>% rename("coef"="tab[[c(12, 1)]]")
d00 <- rownames_to_column(d00, var="variable")
d00 <- d00 %>% filter(grepl("country", variable)==F)
d00[1,2] <- mean_access[1,12]

d00 <- d00 %>% mutate(DLS10_min =
                      case_when(variable=="(Intercept)" ~ d00[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d00[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d00[1,2] + d00[2,2] + d00[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d00[1,2] + d00[3,2] + d00[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d00[1,2] + d00[4,2] + d00[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d00[1,2] + d00[5,2] + d00[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d00[1,2] + d00[6,2] + d00[7,2]+ coef,
                                variable=="rural" ~ d00[1,2]+d00[7,2]))


d1 <- as.data.frame(tab[[c(1,1)]])
d1 <- d1 %>% rename("coef"="tab[[c(1, 1)]]")
d1 <- rownames_to_column(d1, var="variable")
d1 <- d1 %>% filter(grepl("country", variable)==F)
d1[1,2] <- mean_access[1,1]

d1 <- d1 %>% mutate(Housing=
                      case_when(variable=="(Intercept)" ~ d1[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d1[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d1[1,2] + d1[2,2] + d1[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d1[1,2] + d1[3,2] + d1[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d1[1,2] + d1[4,2] + d1[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d1[1,2] + d1[5,2] + d1[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d1[1,2] + d1[6,2] + d1[7,2]+ coef,
                                variable=="rural" ~ d1[1,2]+d1[7,2]))
                    
d2 <- as.data.frame(tab[[c(2,1)]])
d2 <- d2 %>% rename("coef"="tab[[c(2, 1)]]")
d2 <- rownames_to_column(d2, var="variable")
d2 <- d2 %>% filter(grepl("country", variable)==F)
d2[1,2] <- mean_access[1,2]

d2 <- d2 %>% mutate(Thermal=
                      case_when(variable=="(Intercept)" ~ d2[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d2[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d2[1,2] + d2[2,2] + d2[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d2[1,2] + d2[3,2] + d2[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d2[1,2] + d2[4,2] + d2[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d2[1,2] + d2[5,2] + d2[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d2[1,2] + d2[6,2] + d2[7,2]+ coef,
                                variable=="rural" ~ d2[1,2]+d2[7,2]))

d3 <- as.data.frame(tab[[c(3,1)]])
d3 <- d3 %>% rename("coef"="tab[[c(3, 1)]]")
d3 <- rownames_to_column(d3, var="variable")
d3 <- d3 %>% filter(grepl("country", variable)==F) 
d3[1,2] <- mean_access[1,3]

d3 <- d3 %>% mutate(Nutrition=
                      case_when(variable=="(Intercept)" ~ d3[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d3[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d3[1,2] + d3[2,2] + d3[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d3[1,2] + d3[3,2] + d3[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d3[1,2] + d3[4,2] + d3[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d3[1,2] + d3[5,2] + d3[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d3[1,2] + d3[6,2] + d3[7,2]+ coef,
                                variable=="rural" ~ d3[1,2]+d3[7,2]))

d4 <- as.data.frame(tab[[c(4,1)]])
d4 <- d4 %>% rename("coef"="tab[[c(4, 1)]]")
d4 <- rownames_to_column(d4, var="variable")
d4 <- d4 %>% filter(grepl("country", variable)==F) 
d4[1,2] <- mean_access[1,4]

d4 <- d4 %>% mutate(Food=
                      case_when(variable=="(Intercept)" ~ d4[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d4[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d4[1,2] + d4[2,2] + d4[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d4[1,2] + d4[3,2] + d4[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d4[1,2] + d4[4,2] + d4[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d4[1,2] + d4[5,2] + d4[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d4[1,2] + d4[6,2] + d4[7,2]+ coef,
                                variable=="rural" ~ d4[1,2]+d4[7,2]))

d5 <- as.data.frame(tab[[c(5,1)]])
d5 <- d5 %>% rename("coef"="tab[[c(5, 1)]]")
d5 <- rownames_to_column(d5, var="variable")
d5 <- d5 %>% filter(grepl("country", variable)==F)
d5[1,2] <- mean_access[1,5]

d5 <- d5 %>% mutate(Water=
                      case_when(variable=="(Intercept)" ~ d5[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d5[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d5[1,2] + d5[2,2] + d5[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d5[1,2] + d5[3,2] + d5[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d5[1,2] + d5[4,2] + d5[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d5[1,2] + d5[5,2] + d5[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d5[1,2] + d5[6,2] + d5[7,2]+ coef,
                                variable=="rural" ~ d5[1,2]+d5[7,2]))

d6 <- as.data.frame(tab[[c(6,1)]])
d6 <- d6 %>% rename("coef"="tab[[c(6, 1)]]")
d6 <- rownames_to_column(d6, var="variable")
d6 <- d6 %>% filter(grepl("country", variable)==F)
d6[1,2] <- mean_access[1,6]

d6 <- d6 %>% mutate(Sanitation=
                      case_when(variable=="(Intercept)" ~ d6[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d6[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d6[1,2] + d6[2,2] + d6[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d6[1,2] + d6[3,2] + d6[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d6[1,2] + d6[4,2] + d6[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d6[1,2] + d6[5,2] + d6[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d6[1,2] + d6[6,2] + d6[7,2]+ coef,
                                variable=="rural" ~ d6[1,2]+d6[7,2]))

d7 <- as.data.frame(tab[[c(7,1)]])
d7 <- d7 %>% rename("coef"="tab[[c(7, 1)]]")
d7<- rownames_to_column(d7, var="variable")
d7 <- d7 %>% filter(grepl("country", variable)==F) 
d7[1,2] <- mean_access[1,7]

d7 <- d7 %>% mutate(Health=
                      case_when(variable=="(Intercept)" ~ d7[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d7[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d7[1,2] + d7[2,2] + d7[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d7[1,2] + d7[3,2] + d7[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d7[1,2] + d7[4,2] + d7[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d7[1,2] + d7[5,2] + d7[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d7[1,2] + d7[6,2] + d7[7,2]+ coef,
                                variable=="rural" ~ d7[1,2]+d7[7,2]))

d8 <- as.data.frame(tab[[c(8,1)]])
d8 <- d8 %>% rename("coef"="tab[[c(8, 1)]]")
d8 <- rownames_to_column(d8, var="variable")
d8 <- d8 %>% filter(grepl("country", variable)==F) 
d8[1,2] <- mean_access[1,8]

d8 <- d8 %>% mutate(Education=
                      case_when(variable=="(Intercept)" ~ d8[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d8[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d8[1,2] + d8[2,2] + d8[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d8[1,2] + d8[3,2] + d8[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d8[1,2] + d8[4,2] + d8[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d8[1,2] + d8[5,2] + d8[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d8[1,2] + d8[6,2] + d8[7,2]+ coef,
                                variable=="rural" ~ d8[1,2]+d8[7,2]))

d9 <- as.data.frame(tab[[c(9,1)]])
d9 <- d9 %>% rename("coef"="tab[[c(9, 1)]]", )
d9 <- rownames_to_column(d9, var="variable")
d9 <- d9 %>% filter(grepl("country", variable)==F) 
d9[1,2] <- mean_access[1,9]

d9 <- d9 %>% mutate(Social=
                      case_when(variable=="(Intercept)" ~ d9[1,2],
                                grepl("rural", variable)==F & grepl("year", variable)==T~ d9[1,2] + coef, 
                                grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d9[1,2] + d9[2,2] + d9[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d9[1,2] + d9[3,2] + d9[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d9[1,2] + d9[4,2] + d9[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d9[1,2] + d9[5,2] + d9[7,2]+ coef,
                                grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d9[1,2] + d9[6,2] + d9[7,2]+ coef,
                                variable=="rural" ~ d9[1,2]+d9[7,2]))

d10 <- as.data.frame(tab[[c(10,1)]])
d10 <- d10 %>% rename("coef"="tab[[c(10, 1)]]")
d10 <- rownames_to_column(d10, var="variable")
d10 <- d10 %>% filter(grepl("country", variable)==F) 
d10[1,2] <- mean_access[1,10]

d10 <- d10 %>% mutate(Physical=
                        case_when(variable=="(Intercept)" ~ d10[1,2],
                                  grepl("rural", variable)==F & grepl("year", variable)==T~ d10[1,2] + coef, 
                                  grepl("rural", variable)==T & grepl("1994]", variable)==T ~ d10[1,2] + d10[2,2] + d10[7,2]+ coef,
                                  grepl("rural", variable)==T & grepl("1999]", variable)==T ~ d10[1,2] + d10[3,2] + d10[7,2]+ coef,
                                  grepl("rural", variable)==T & grepl("2004]", variable)==T ~ d10[1,2] + d10[4,2] + d10[7,2]+ coef,
                                  grepl("rural", variable)==T & grepl("2009]", variable)==T ~ d10[1,2] + d10[5,2] + d10[7,2]+ coef,
                                  grepl("rural", variable)==T & grepl("Inf]", variable)==T ~ d10[1,2] + d10[6,2] + d10[7,2]+ coef,
                                  variable=="rural" ~ d10[1,2]+d10[7,2]))

d <- d0 %>% 
  left_join(d1, by=c("variable")) %>%
  left_join(d2, by=c("variable")) %>% 
  left_join(d3, by=c("variable")) %>% 
  left_join(d4, by=c("variable")) %>% 
  left_join(d5, by=c("variable")) %>% 
  left_join(d6, by=c("variable")) %>% 
  left_join(d7, by=c("variable")) %>% 
  left_join(d8, by=c("variable")) %>% 
  left_join(d9, by=c("variable")) %>% 
  left_join(d10, by=c("variable"))

d <- d %>% 
  mutate(rural = ifelse(grepl("rural", variable)==T, 1,0))

d <- d %>% 
  select(!contains("coef")) %>% 
  mutate(variable = ifelse(variable %in% c("(Intercept)","rural"),"(2009,2014]",variable ))

d$survey_year_aux <- as.numeric(gsub("\\D", "", d$variable))


d <- d %>% mutate(
  order = case_when(survey_year_aux == 1994 ~ 1L,
                              survey_year_aux == 19941999 ~ 2L,
                              survey_year_aux == 19992004 ~ 3L,
                              survey_year_aux == 20042009 ~ 4L, 
                              survey_year_aux == 20092014 ~ 5L,
                              survey_year_aux == 2014 ~ 6L
  ))


d <- d %>% 
  pivot_longer(cols = ">2/3 of DLS dimensions":Physical, names_to = "dimension", values_to = "share")


##
## LINE PLOT - DEVELOPMENTS OVER TIME ------------------------------------------
## 


g1 <- d %>% 
  mutate(order2 = recode(dimension,
                         ">2/3 of DLS dimensions"=0L,
                        "Housing"=1L,
                        "Thermal"=2L,
                        "Nutrition"=3L,
                        "Food"=4L,
                        "Water"=5L,
                        "Sanitation"=6L,
                        "Health"=7L,
                        "Education"=8L,
                        "Social"=9L,
                        "Physical"=10L)) %>% 
  mutate(dimension = recode(dimension,
                         "Nutrition"="Food & nutrition",
                         "Food"="Food preparation")) %>% 
  ggplot() + 
  geom_line(mapping=aes(y=share, 
                        x=order, 
                        color=as.factor(rural)),
            alpha=0.7,
            stat = "identity",
            linewidth=1.2) +
  geom_point(mapping=aes(y=share, 
                         x=order, 
                         color=as.factor(rural)),
             alpha=0.7,
             stat = "identity")+
  ylab("% population with access to DLS services")+xlab("")+
  scale_y_continuous(labels=scales::percent)+
  scale_x_continuous(breaks=1:6, 
  labels=c("1990-1994", "1995-1999", "2000-2004", "2005-2009", "2010-2014", "2015-2021"))+
  scale_color_manual(name="",
                     values=c("#0038e0", "#e01409"),
                     labels=c( "Urban", "Rural"))+
  facet_wrap(facets = ~fct_reorder(dimension, order2), nrow=3)+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.text = element_text(size=14),
        axis.text=element_text(size=12),
        axis.title = element_text(size=14),
        axis.text.x = element_text(angle=70,hjust=1),
        strip.background =element_rect(fill="white"),
        strip.text = element_text(size=12, colour = 'black', face='bold'),
        legend.box.background = element_rect(colour = "Grey"))+
  ggtitle("")

g1

ggsave(plot=g1, filename = "figure 2a.png",
       width = 12, height = 7)
save(g1, file = "figure 2a.RData")

##
## SAVING MODEL TABLE ----------------------------------------------------------
## 

m1 <- lm(dim1_housing_region ~ as.factor(year)*rural+
     country_name, data=dls)

m2 <- lm(dim2_thermal_region ~ as.factor(year)*rural+
     country_name, data=dls)

m3 <- lm(dim3_nutrition_region ~as.factor(year)*rural+
     country_name,
   data=dls)

m4 <- lm(dim4_foodprep_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m5 <- lm(dim5_water_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m6 <- lm(dim6_sanitation_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m7 <- lm(dim7_health_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m8 <- lm(dim8_education_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m9 <- lm(dim9_socialconnect_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m10 <- lm(dim10_physicalconnect_region ~ as.factor(year)*rural+
     country_name,
   data=dls)

m0 <- lm(dls_min7_region ~ as.factor(year)*rural+
           country_name,
         data=dls)

library(stargazer)
models <- stargazer(m0, m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,
                    intercept.bottom= FALSE, 
                    intercept.top = TRUE, 
                    report=('vcsp'),
                    omit=c("country.name"), 
                    type="text", out="supplementary table S4.html")

summary(mean_access)
